t_prob and B_prob and eventual stabilitylibrary(knitr)
library(tidyverse)
f_p <- 'results-100.csv'
df_results <- read.csv(file.path(getwd(), 'outputs', f_p))
df_results <- df_results %>%
mutate(is_final_generation=ifelse(is_final_generation=="True", TRUE, FALSE))
## Warning: package 'bindrcpp' was built under R version 3.4.4
cat(paste0(length(unique(df_results$seed))), "simulations run")
## 100 simulations run
# Get stable trajectories
stables <- df_results %>%
mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed)) %>%
filter(is_final_generation) %>%
mutate(stable=ifelse(that_rate > 0.001 & that_rate < 0.999, TRUE, FALSE)) %>%
filter(stable)
stables <- stables$klass
that-rate trajectories# Plot all trajectories
n_samples <- 100
seed_samples <- sample(df_results$seed, n_samples)
df_results %>%
filter(seed %in% seed_samples) %>%
mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
num_generations=as.numeric(num_generations),
stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
ggplot(aes(x=num_generations, y=that_rate, col=stable, group=klass)) +
geom_line(alpha=0.3) +
theme_classic() +
theme(legend.position="none") +
ggtitle("that-rate trajectories")
# Facet by initial B_prob, t_prob
n_samples <- 10
seed_samples <- sample(df_results$seed, n_samples)
df_results %>%
filter(seed %in% seed_samples) %>%
mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
num_generations=as.numeric(num_generations),
stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
ggplot(aes(x=num_generations, y=that_rate, col=stable, group=klass)) +
geom_line(alpha=0.3) +
theme_classic() +
theme(legend.position="none") +
facet_grid(round(t_prob, 3)*round(B_prob, 3)~.) +
ggtitle(paste0(n_samples, " samples"))
r trajectoriesdf_results %>%
mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
num_generations=as.numeric(num_generations),
stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
ggplot(aes(x=num_generations, y=r, col=stable, group=klass)) +
geom_line(alpha=0.3) +
theme_classic() +
theme(legend.position="none") +
ggtitle("r trajectories")
# Facet by initial B_prob, t_prob
df_results %>%
filter(seed %in% seed_samples) %>%
mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
num_generations=as.numeric(num_generations),
stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
ggplot(aes(x=num_generations, y=r, col=stable, group=klass)) +
geom_line(alpha=0.3) +
theme_classic() +
theme(legend.position="none") +
facet_grid(round(t_prob, 3)*round(B_prob, 3)~.)
# R. Levy's preprocessing
# dat$stable <- with(dat,thatrate > 0.001 & thatrate < 0.999)
# dat <- subset(dat, ! (k==1.0 & c==0.0))
# dat.summary <- dat %>% group_by(k,c) %>%
# dplyr:::summarise(stable=mean(stable),r=mean(r))
# This preprocessing is only valid for stable optionality plot
df_preprocessed <- df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(k != 1.0, c != 0.0) %>%
group_by(k, c) %>%
summarise(stable=mean(stable), r=mean(r))
df_preprocessed %>%
ggplot(aes(k,c)) +
geom_tile(aes(fill=stable),colour="white") +
labs(y=expression(paste("String length cost parameter ", c)), fill="stable\noptionality\nrate") +
theme_classic() +
scale_x_continuous(name=expression(paste("Nonuniformity penalization parameter ",k)),
breaks=seq(1,2,by=0.2))
## Distribution of marginal frequencies of optional marker t at fixed points with stable optionality
df_results %>%
filter(is_final_generation) %>%
mutate(stable=that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
ggplot(aes(x=that_rate,y=..density..)) +
geom_histogram(bins=42) +
scale_x_continuous(limits=c(-0.05,1.05)) +
ylab("Probability density") +
xlab(expression(paste("Marginal frequency of optional marker ",t))) +
theme_classic()
## Warning: Removed 1 rows containing missing values (geom_bar).
## distribution of correlations between phrase onset probability and t-rate at fixed points with stable optionality
df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
ggplot(aes(x=r, y=..density..)) +
geom_histogram(bins=42) +
scale_x_continuous(limits=c(-1.05,1.05)) +
ylab("Probability density") +
xlab("Pearson correlation between\nphrase onset & t probabilities") +
theme_classic()
df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
ggplot(aes(x=that_rate, y=r, col=stable)) +
geom_point(alpha=0.3) +
theme_classic()
Proportion stable?
df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
group_by(stable) %>%
summarise(n=n()) %>%
ungroup %>%
mutate(total=sum(n),
prop=n/total) %>%
ggplot(aes(x=stable, y=prop)) +
geom_bar(stat='identity') +
geom_text(aes(x=stable, y=prop, label=round(prop, 2)), nudge_y=0.025) +
ylim(0, 1) +
ggtitle("Proportion of simulations leading to stable optionality") +
theme_classic()
Descriptives
df_results %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
select(num_generations) %>%
summary()
## num_generations
## Min. : 0.00
## 1st Qu.: 1.00
## Median : 2.00
## Mean : 5.36
## 3rd Qu.: 6.00
## Max. :100.00
df_results %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
group_by(num_generations) %>%
summarise(cnt=n()) %>%
ggplot(aes(x=num_generations, y=cnt)) +
geom_bar(stat='identity') +
theme_classic()
df_results %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
mutate(num_generations=num_generations+1) %>%
group_by(num_generations) %>%
summarise(cnt=n()) %>%
ggplot(aes(x=log(num_generations), y=log(cnt))) +
geom_point(stat='identity', size=2.5, alpha=0.4) +
geom_smooth(method='lm') +
theme_classic()
Relation between t_prob and eventual stability?
df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
filter(stable) %>%
ggplot(aes(x=num_generations, y=t_prob, col=k, size=c)) +
geom_point(alpha=0.3) +
theme(legend.position = 'none') +
theme_classic()
df_lm <- df_results %>%
filter(is_final_generation) %>%
mutate(stable = that_rate > 0.001 & that_rate < 0.999)
reg <- lm(r~num_generations+k*c+B_prob+t_prob+stable, data=df_lm)
summary(reg)
##
## Call:
## lm(formula = r ~ num_generations + k * c + B_prob + t_prob +
## stable, data = df_lm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17889 -0.21677 -0.02675 0.17935 1.56668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4942606 0.0196194 25.192 <2e-16 ***
## num_generations 0.0052020 0.0003527 14.750 <2e-16 ***
## k -0.1551294 0.0119264 -13.007 <2e-16 ***
## c 0.1878185 0.0144019 13.041 <2e-16 ***
## B_prob -0.2701854 0.0054637 -49.451 <2e-16 ***
## t_prob -1.1756335 0.0059721 -196.855 <2e-16 ***
## stableTRUE -0.2431126 0.0050611 -48.036 <2e-16 ***
## k:c -0.0829793 0.0092468 -8.974 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.335 on 44092 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.519
## F-statistic: 6799 on 7 and 44092 DF, p-value: < 2.2e-16